Minimal QC recommended: Current best practices in single‐cell RNA‐seq analysis: a tutorial Orchestrating Single-cell Analysis ### Compiling Issues This document had trouble compiling via knitr and pandoc. The following thread fixed the issue (https://github.com/rstudio/rstudio/issues/3661).
seu <- readRDS(paste0(data_dir, "fetal_assigned_res.0.3_2020-12-16.rda"))
DimPlot(seu, group.by = 'seurat_clusters', label = T, pt.size = .25)
Split into maternal versus fetal subsets.
split <- SplitObject(seu, split.by = 'fetal')
rm(seu) # Clean memory
fetal <- split$Fetal
maternal <- split$Maternal
rm(split) # Clean memory
DimPlot(fetal) + ggtitle("Fetal")
DimPlot(maternal) + ggtitle("Maternal")
Custom function to run fastMNN pipe with
pipe.fast.mnn <- function(seu, batch) {
seu <- RunFastMNN(object.list = SplitObject(seu, split.by = batch), verbose = T)
#seu@tools$RunFastMNN@metadata$merge.info$lost.var %>% print
seu <- RunUMAP(seu, reduction = "mnn", dims = 1:30)
seu <- FindNeighbors(seu, reduction = "mnn", dims = 1:30)
seu <- FindClusters(seu, res = 0.2)
return(seu)
}
fetal <- pipe.fast.mnn(fetal, "biorep")
## Computing 2000 integration features
## No variable features found for object1 in the object.list. Running FindVariableFeatures ...
## No variable features found for object2 in the object.list. Running FindVariableFeatures ...
## No variable features found for object3 in the object.list. Running FindVariableFeatures ...
## No variable features found for object4 in the object.list. Running FindVariableFeatures ...
## No variable features found for object5 in the object.list. Running FindVariableFeatures ...
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 22:41:46 UMAP embedding parameters a = 0.9922 b = 1.112
## 22:41:46 Read 10387 rows and found 30 numeric columns
## 22:41:46 Using Annoy for neighbor search, n_neighbors = 30
## 22:41:46 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 22:41:47 Writing NN index file to temp file C:\Users\Kyle\AppData\Local\Temp\RtmpoNUwI8\file2e6857fb110a
## 22:41:47 Searching Annoy index using 1 thread, search_k = 3000
## 22:41:48 Annoy recall = 100%
## 22:41:49 Commencing smooth kNN distance calibration using 1 thread
## 22:41:50 Initializing from normalized Laplacian + noise
## 22:41:51 Commencing optimization for 200 epochs, with 463062 positive edges
## 22:42:01 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10387
## Number of edges: 421557
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9711
## Number of communities: 13
## Elapsed time: 0 seconds
DimPlot(fetal, group.by = "seurat_clusters", label = T, pt.size =.25)
# Function that accepts Seurat that has been processed up to the clustering step, clusters at desired resolutions (vector), adds cluster identities at different resolutions, and returns Seurat object with resolution cluster identities
Seurat_clustree <- function (seuratObject, resolutions) {
for(hyperparameter in resolutions) {
print(hyperparameter)
prefix <- paste0("res.", hyperparameter)
print(prefix)
seuratObject <- FindClusters(object = seuratObject, resolution = hyperparameter)
seuratObject <- AddMetaData(object = seuratObject, metadata = seuratObject$seurat_clusters, col.name = prefix)
}
return(seuratObject)
}
resolutions <- seq(from = 0.1, to = 0.8, by = .1)
fetal <- Seurat_clustree(fetal, resolutions)
## [1] 0.1
## [1] "res.0.1"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10387
## Number of edges: 421557
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9828
## Number of communities: 11
## Elapsed time: 0 seconds
## [1] 0.2
## [1] "res.0.2"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10387
## Number of edges: 421557
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9711
## Number of communities: 13
## Elapsed time: 0 seconds
## [1] 0.3
## [1] "res.0.3"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10387
## Number of edges: 421557
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9597
## Number of communities: 15
## Elapsed time: 0 seconds
## [1] 0.4
## [1] "res.0.4"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10387
## Number of edges: 421557
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9488
## Number of communities: 16
## Elapsed time: 0 seconds
## [1] 0.5
## [1] "res.0.5"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10387
## Number of edges: 421557
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9389
## Number of communities: 17
## Elapsed time: 0 seconds
## [1] 0.6
## [1] "res.0.6"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10387
## Number of edges: 421557
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9290
## Number of communities: 18
## Elapsed time: 0 seconds
## [1] 0.7
## [1] "res.0.7"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10387
## Number of edges: 421557
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9197
## Number of communities: 19
## Elapsed time: 0 seconds
## [1] 0.8
## [1] "res.0.8"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10387
## Number of edges: 421557
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9106
## Number of communities: 19
## Elapsed time: 0 seconds
Iterating over 0.2 to 0.7 clustering resolution, .3 looks stable. Additional divisions at .5
clustree(fetal, prefix = "res.", node_colour = "sc3_stability") + theme(legend.position = "bottom") + guides(edge_alpha = F)
## Warning: The `add` argument of `group_by()` is deprecated as of dplyr 1.0.0.
## Please use the `.add` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
Resolution 0.1 to 0.2 just changes cell assignment some. From 0.2 to 0.3, makes 2 more splits by large cytotrophoblast cluster, likely by sex, more thoroughly explored below. Set default resolution to 0.2.
DimPlot(fetal, group.by = 'res.0.1', label = T, pt.size = .25)
DimPlot(fetal, group.by = 'res.0.2', label = T, pt.size = .25)
DimPlot(fetal, group.by = 'res.0.3', label = T, pt.size = .25)
DimPlot(fetal, group.by = 'res.0.4', label = T, pt.size = .25)
# Set default to 0.2
fetal <- FindClusters(fetal, res = 0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10387
## Number of edges: 421557
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9711
## Number of communities: 13
## Elapsed time: 0 seconds
DimPlot(fetal, group.by = 'biorep', label = F, pt.size = .25)
#fetal.markers.res.0.2 <- FindAllMarkers(fetal)
#ct.diff <- FindMarkers(fetal, ident.1 = "7", ident.2 = "0", min.pct = 0, logfc.threshold = 0, only.pos = F, min.cells.feature = 1, min.cells.group = 1)
#ct.markers <- FindMarkers(fetal, ident.1 = "7", ident.2 = "0")
#ct.markers %>%
# arrange(desc(avg_logFC)) %>%
# View()
#ct.diff %>% rownames_to_column(var = "gene") %>%
# filter(gene %in% c("PAGE4", "XIST", "DDX3X", "E1F1AX", "TOP2A", "PCNA", "MKI67"))
Trying to tease apart proliferative CTs at the ST surface from interstitial CTs. Pique-regi et al. used sex-specific transcripts that may have been counfounded by fetal sex. Let’s look some more canonical markers from “Expression of the proliferation markers Ki67 and transferrin receptor by human trophoblast populations (1988)”. Ki67 very highly expressed in cluster 7 compared to 0 and is a marker of CTs near the ST interace. Transferrin, however, was deteced only in a couple cells at very low level. In addition, a differential expression test between the 2 clusters revealed very high expression of PCNA, a cofactor of DNA polymerase delta and TOP2A, DNA Topoisomerase II Alpha.
# Cytotrophoblasts
FeaturePlot(object = fetal,
features = c("XIST", "PAGE4"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = fetal,
features = c("EIF1AX", "DDX3X"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = fetal,
features = c("MKI67", "TF"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = fetal,
features = c("PCNA", "TOP2A"),
cols = c("lightgrey", "blue"),
pt.size = .1)
AverageExpression(fetal, features = c("PAGE4", "XIST", "DDX3X", "EIF1AX"))
## Warning: The following 3 features were not found in the mnn.reconstructed assay:
## XIST, DDX3X, EIF1AX
## $RNA
## kc.40.1 kc.40.2 kc.42.1 kc.42.2 pr.478 pr.481
## PAGE4 0.4333043 0.2254967 0.12622167 0.01689603 37.615179261 79.372876
## XIST 1.9868826 2.0671099 0.07328827 0.06445954 0.007298265 2.309304
## DDX3X 2.7307284 2.4783421 1.63605487 1.71321785 2.381785534 3.098342
## EIF1AX 2.1550887 2.3502178 1.37376425 1.39150183 1.561301655 1.916287
## pr.484
## PAGE4 70.79850047
## XIST 0.00189618
## DDX3X 1.62982322
## EIF1AX 1.57120550
##
## $mnn.reconstructed
## kc.40.1 kc.40.2 kc.42.1 kc.42.2 pr.478 pr.481
## [1,] -0.05655722 -0.05644057 -0.05739824 -0.05736874 -0.05080359 -0.0327245
## pr.484
## [1,] -0.0378834
AverageExpression(fetal, features = c("PAGE4", "TOP2A", "PCNA", "MKI67", "TF"))
## Warning: The following 1 features were not found in the mnn.reconstructed assay:
## TF
## $RNA
## kc.40.1 kc.40.2 kc.42.1 kc.42.2 pr.478 pr.481
## PAGE4 0.433304344 0.22549671 0.12622167 0.016896026 37.61517926 79.372875634
## TOP2A 0.014936628 0.02480205 0.03187013 0.013994787 0.34534881 0.370580598
## PCNA 0.327434670 0.34165433 0.33507899 0.291503487 0.75763506 0.756721144
## MKI67 0.007064542 0.01125905 0.02860220 0.007716447 0.14351264 0.108884084
## TF 0.036603825 0.03770834 0.02943784 0.021056200 0.01422366 0.006968874
## pr.484
## PAGE4 70.79850047
## TOP2A 0.55319402
## PCNA 0.97512383
## MKI67 0.20243591
## TF 0.01001539
##
## $mnn.reconstructed
## kc.40.1 kc.40.2 kc.42.1 kc.42.2 pr.478
## PAGE4 -0.056557220 -0.056440571 -0.057398240 -0.057368740 -0.050803589
## TOP2A -0.002602547 -0.002656029 -0.002522846 -0.002583248 0.002036148
## PCNA -0.003299982 -0.003280327 -0.003527818 -0.003740722 0.001904908
## MKI67 -0.001280102 -0.001354702 -0.001323286 -0.001389995 0.001324334
## pr.481 pr.484
## PAGE4 -0.032724497 -0.037883399
## TOP2A 0.002095300 0.004182101
## PCNA 0.003069453 0.004437753
## MKI67 0.001150792 0.002449169
A small sex difference is clear in the KC data. A very large sex difference is apparent in the PR data.
split <- SplitObject(fetal, split.by = 'batch')
kc <- split$kc
DimPlot(kc, group.by = "seurat_clusters", label = T, pt.size =.25) + NoLegend()
DimPlot(kc, group.by = "orig.ident", label = F, pt.size =.25)
pr <- split$pr
DimPlot(pr, group.by = "seurat_clusters", label = T, pt.size =.25) + NoLegend()
DimPlot(pr, group.by = "orig.ident", label = F, pt.size =.25)
rm(split)
Key gene expression markers
# Cytotrophoblasts
FeaturePlot(object = fetal,
features = c("KRT8", "PAGE4"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# Fibroblasts
FeaturePlot(object = fetal,
features = c("COL1A1"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# Monocyte (and potential CD1C+ Dendritic Cell subset)
FeaturePlot(object = fetal,
features = c("CD1C", "CD14"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = fetal,
features = c("CD1C", "CLEC9A"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# FCGR3A+ Monocytes
FeaturePlot(object = fetal,
features = c("FCGR3A", "MS4A7"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# Hofbauer
FeaturePlot(object = fetal,
features = c("CD163", "CD14"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = fetal,
features = c("LYVE1"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# B Cells
FeaturePlot(object = fetal,
features = c("CD79A"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# Extravillous Trophoblast (EVT)
FeaturePlot(object = fetal,
features = c("MMP2", "HLA-G"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# Endothelial
FeaturePlot(object = fetal,
features = c("KDR", "ESAM"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# pDC ref: "Single-cell RNA-seq reveals new types of human blood dendritic cells, monocytes and progenitors"
# Looks like pDC are actually off the tail of
FeaturePlot(object = fetal,
features = c("HLA-DRA"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = fetal,
features = c("CLEC4C", "IL3RA"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = fetal,
features = c("GZMB", "SERPINF1"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = fetal,
features = c("IL3RA"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# DC 1, unexpectedly, no CLEC9A expression, which is supposed to clearly delineate a THBD+ subset, but shows up in the small cluster thought to be pDC
FeaturePlot(object = fetal,
features = c("THBD", "CLEC9A"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# RBCs
FeaturePlot(object = fetal,
features = c("HBB", "HBG2"),
cols = c("lightgrey", "blue"),
pt.size = .1)
monocytes <- subset(fetal, idents = c("4", "10"))
monocytes <- pipe.fast.mnn(monocytes, batch = "biorep")
## Computing 2000 integration features
## No variable features found for object1 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.357
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.30103
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 9.8368e-015
## No variable features found for object2 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.3107
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.30103
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.8325e-015
## No variable features found for object3 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -1.934
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32121
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 6.0675e-028
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## No variable features found for object4 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -1.7181
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3202
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 8.4835e-028
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.031008
## No variable features found for object5 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : at -1.7266
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : radius 0.00036212
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -1.7266
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.01903
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
## 22:42:54 UMAP embedding parameters a = 0.9922 b = 1.112
## 22:42:54 Read 1047 rows and found 30 numeric columns
## 22:42:54 Using Annoy for neighbor search, n_neighbors = 30
## 22:42:54 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 22:42:54 Writing NN index file to temp file C:\Users\Kyle\AppData\Local\Temp\RtmpoNUwI8\file2e684db22026
## 22:42:54 Searching Annoy index using 1 thread, search_k = 3000
## 22:42:55 Annoy recall = 100%
## 22:42:55 Commencing smooth kNN distance calibration using 1 thread
## 22:42:56 Initializing from normalized Laplacian + noise
## 22:42:56 Commencing optimization for 500 epochs, with 43180 positive edges
## 22:42:59 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1047
## Number of edges: 44217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8680
## Number of communities: 3
## Elapsed time: 0 seconds
monocytes <- Seurat_clustree(monocytes, seq(0.1, 0.8, by = 0.1))
## [1] 0.1
## [1] "res.0.1"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1047
## Number of edges: 44217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9220
## Number of communities: 2
## Elapsed time: 0 seconds
## [1] 0.2
## [1] "res.0.2"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1047
## Number of edges: 44217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8680
## Number of communities: 3
## Elapsed time: 0 seconds
## [1] 0.3
## [1] "res.0.3"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1047
## Number of edges: 44217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8331
## Number of communities: 5
## Elapsed time: 0 seconds
## [1] 0.4
## [1] "res.0.4"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1047
## Number of edges: 44217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8049
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.5
## [1] "res.0.5"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1047
## Number of edges: 44217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7780
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.6
## [1] "res.0.6"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1047
## Number of edges: 44217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7532
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.7
## [1] "res.0.7"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1047
## Number of edges: 44217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7310
## Number of communities: 7
## Elapsed time: 0 seconds
## [1] 0.8
## [1] "res.0.8"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1047
## Number of edges: 44217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7096
## Number of communities: 7
## Elapsed time: 0 seconds
clustree(monocytes, prefix = "res.", node_colour = "sc3_stability") + theme(legend.position = "bottom") + guides(edge_alpha = F)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
monocytes <- FindClusters(monocytes, resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1047
## Number of edges: 44217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8331
## Number of communities: 5
## Elapsed time: 0 seconds
DimPlot(monocytes, group.by = 'seurat_clusters', label = T, pt.size = .25)
#monocytes.markers <- FindAllMarkers(monocytes)
“Single-cell RNA-seq reveals new types of human blood dendritic cells, monocytes and progenitors” for marker genes for the small monocyte population. pDC cluster may contain the “new” DC5 population from this paper, but leaving as pDC.
# FCGR3A+ Monocytes not well separated by clustering, leave as CD14+ monocytes
FeaturePlot(object = monocytes,
features = c("FCGR3A", "CD14"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# Dendritic cell from Seurat Guided Clustering Vignette
FeaturePlot(object = monocytes,
features = c("FCER1A", "CST3"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = monocytes,
features = c("THBD", "LYZ"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = monocytes,
features = c("VCAN", "ANXA1"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# Rare population does not look like conventional dendritic based on lack of expression of CD1C and THBD
# Possibly cDC progenitor with some SEMA4D
# CD100
VlnPlot(monocytes, "SEMA4D")
# CD141
VlnPlot(monocytes, c("CD1C", "THBD"))
# DC5, new popn.
VlnPlot(monocytes, "PPP1R14A")
VlnPlot(monocytes, "CD22")
VlnPlot(monocytes, "DAB2")
# pDC markers, CD123
VlnPlot(monocytes, "IL3RA")
VlnPlot(monocytes, "ITM2C")
VlnPlot(monocytes, "SERPINF1")
VlnPlot(monocytes, "GZMB")
# Does not express pan-lymphocyte marker
VlnPlot(monocytes, "IL7R")
Using resolution of 0.3, the FCGR3A+ monocytes (and possibly the related FCER1A+ DC subtype comes with) are cluster identity 2, will not pull out because it’s such a small population.
fcgr3a.monoctyes <- WhichCells(monocytes, ident = "2")
t.cells <- subset(fetal, idents = c("1", "2"))
t.cells <- pipe.fast.mnn(t.cells, batch = "biorep")
## Computing 2000 integration features
## No variable features found for object1 in the object.list. Running FindVariableFeatures ...
## No variable features found for object2 in the object.list. Running FindVariableFeatures ...
## No variable features found for object3 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.6325
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.50007
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.3672e-014
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## No variable features found for object4 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.5943
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.49973
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.348e-014
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## No variable features found for object5 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -1.9931
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32104
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 4.9719e-028
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.031008
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
## 22:43:22 UMAP embedding parameters a = 0.9922 b = 1.112
## 22:43:22 Read 3181 rows and found 30 numeric columns
## 22:43:22 Using Annoy for neighbor search, n_neighbors = 30
## 22:43:22 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 22:43:22 Writing NN index file to temp file C:\Users\Kyle\AppData\Local\Temp\RtmpoNUwI8\file2e6866af4a39
## 22:43:22 Searching Annoy index using 1 thread, search_k = 3000
## 22:43:22 Annoy recall = 100%
## 22:43:23 Commencing smooth kNN distance calibration using 1 thread
## 22:43:24 Initializing from normalized Laplacian + noise
## 22:43:24 Commencing optimization for 500 epochs, with 152752 positive edges
## 22:43:32 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3181
## Number of edges: 148969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9024
## Number of communities: 3
## Elapsed time: 0 seconds
t.cells <- Seurat_clustree(t.cells, seq(0.1, 0.8, by = 0.1))
## [1] 0.1
## [1] "res.0.1"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3181
## Number of edges: 148969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9491
## Number of communities: 2
## Elapsed time: 0 seconds
## [1] 0.2
## [1] "res.0.2"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3181
## Number of edges: 148969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9024
## Number of communities: 3
## Elapsed time: 0 seconds
## [1] 0.3
## [1] "res.0.3"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3181
## Number of edges: 148969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8693
## Number of communities: 4
## Elapsed time: 0 seconds
## [1] 0.4
## [1] "res.0.4"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3181
## Number of edges: 148969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8387
## Number of communities: 5
## Elapsed time: 0 seconds
## [1] 0.5
## [1] "res.0.5"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3181
## Number of edges: 148969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8111
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.6
## [1] "res.0.6"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3181
## Number of edges: 148969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7885
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.7
## [1] "res.0.7"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3181
## Number of edges: 148969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7664
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.8
## [1] "res.0.8"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3181
## Number of edges: 148969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7451
## Number of communities: 7
## Elapsed time: 0 seconds
clustree(t.cells, prefix = "res.", node_colour = "sc3_stability") + theme(legend.position = "bottom") + guides(edge_alpha = F)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
t.cells <- FindClusters(t.cells, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3181
## Number of edges: 148969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7451
## Number of communities: 7
## Elapsed time: 0 seconds
DimPlot(t.cells, group.by = 'seurat_clusters', label = T, pt.size = .25)
#t.cells.markers <- FindAllMarkers(t.cells)
t.cells <- RenameIdents(t.cells,
"0" = "Naive CD4+ T Cells",
"1" = "CD8+ Cytotoxic T Cells",
"2" = "Naive CD8+ T Cells",
"3" = "Memory CD4+ T Cells",
"4" = "GZMK+ Natural Killer",
"5" = "Natural Killer T Cells",
"6" = "GZMB+ Natural"
)
DimPlot(t.cells, group.by = 'ident', label = T, repel = T, pt.size = .25) + NoLegend()
# saveRDS(t.cells, file = paste0(data_dir, "t_cells_subcluster", Sys.Date(), ".rda"))
# t.cells <- readRDS(paste0(data_dir, "t_cells_subcluster2020-12-21.rda"))
Differentiating CD4 from CD8 T cells https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html Seurat cell type assignment
# Naive CD4+
FeaturePlot(object = t.cells,
features = c("IL7R", "CCR7", "CD4"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# T Cell naive markers, not really used
FeaturePlot(object = t.cells,
features = c("SELL", "LRRN3"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# Memory CD4+
FeaturePlot(object = t.cells,
features = c("IL7R", "S100A4"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# T regs do not come out clearly, not used
FeaturePlot(object = t.cells,
features = c("IL2RA", "FOXP3"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# TRM/TEM markers found in some CD8+ T cells, not used
FeaturePlot(object = t.cells,
features = c("CXCR6"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = t.cells,
features = c("ITGA1"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# NCAM1 = CD56, NK specific marker, but can also be expressed by T cells; KLRB1 is NK1.1 (CD161)
FeaturePlot(object = t.cells,
features = c("NCAM1", "KLRB1"),
cols = c("lightgrey", "blue"),
pt.size = .1)
# NK subtypes GZMB vs. GZMK
FeaturePlot(object = t.cells,
features = c("GZMB", "GZMK"),
cols = c("lightgrey", "blue"),
pt.size = .1)
“Single-cell transcriptomic landscape of nucleated cells in umbilical cord blood” KLRB1+ in the T cell CD8+ Cytotoxic is different from the publication. Otherwise, it seems that cluster 5 is NKT due to CD3E and NK markers and clusters 4 vs. 6 distinguish NK subtypes GZMB+ vs GZMK+.
VlnPlot(t.cells, features = "CD3E")
VlnPlot(t.cells, features = "CD4")
VlnPlot(t.cells, features = "CD8A")
VlnPlot(t.cells, features = "KLRB1")
VlnPlot(t.cells, features = c("GZMB", "GZMK"))
VlnPlot(t.cells, features = c("S100A4", "IL7R"))
# Resident Memory CD4+ T Cell Markers (https://www.rndsystems.com/product-highlights/antibodies-memory-t-cell-subset-identification), CD69 and CD103 (TIGAE)
VlnPlot(t.cells, features = c("CD69", "ITGAE"))
mesenchymal <- subset(fetal, idents = "3")
mesenchymal <- pipe.fast.mnn(mesenchymal, "biorep")
## Computing 2000 integration features
## No variable features found for object1 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.0344
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.49873
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6173e-014
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## No variable features found for object2 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.1154
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.49914
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.4803e-015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.22764
## No variable features found for object3 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.3149
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.30103
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.685e-016
## No variable features found for object4 in the object.list. Running FindVariableFeatures ...
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : at -1.4946
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : radius 0.0003054
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -1.4946
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.017476
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## No variable features found for object5 in the object.list. Running FindVariableFeatures ...
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
## 22:43:58 UMAP embedding parameters a = 0.9922 b = 1.112
## 22:43:58 Read 1232 rows and found 30 numeric columns
## 22:43:58 Using Annoy for neighbor search, n_neighbors = 30
## 22:43:58 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 22:43:58 Writing NN index file to temp file C:\Users\Kyle\AppData\Local\Temp\RtmpoNUwI8\file2e686ed03bbb
## 22:43:58 Searching Annoy index using 1 thread, search_k = 3000
## 22:43:58 Annoy recall = 100%
## 22:43:59 Commencing smooth kNN distance calibration using 1 thread
## 22:43:59 Initializing from normalized Laplacian + noise
## 22:43:59 Commencing optimization for 500 epochs, with 51812 positive edges
## 22:44:03 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1232
## Number of edges: 49714
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8427
## Number of communities: 3
## Elapsed time: 0 seconds
mesenchymal <- Seurat_clustree(mesenchymal, seq(0.1, 0.8, by = 0.1))
## [1] 0.1
## [1] "res.0.1"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1232
## Number of edges: 49714
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9149
## Number of communities: 2
## Elapsed time: 0 seconds
## [1] 0.2
## [1] "res.0.2"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1232
## Number of edges: 49714
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8427
## Number of communities: 3
## Elapsed time: 0 seconds
## [1] 0.3
## [1] "res.0.3"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1232
## Number of edges: 49714
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8146
## Number of communities: 5
## Elapsed time: 0 seconds
## [1] 0.4
## [1] "res.0.4"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1232
## Number of edges: 49714
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7891
## Number of communities: 5
## Elapsed time: 0 seconds
## [1] 0.5
## [1] "res.0.5"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1232
## Number of edges: 49714
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7636
## Number of communities: 5
## Elapsed time: 0 seconds
## [1] 0.6
## [1] "res.0.6"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1232
## Number of edges: 49714
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7394
## Number of communities: 6
## Elapsed time: 0 seconds
## [1] 0.7
## [1] "res.0.7"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1232
## Number of edges: 49714
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7177
## Number of communities: 7
## Elapsed time: 0 seconds
## [1] 0.8
## [1] "res.0.8"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1232
## Number of edges: 49714
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6998
## Number of communities: 7
## Elapsed time: 0 seconds
clustree(mesenchymal, prefix = "res.", node_colour = "sc3_stability") + theme(legend.position = "bottom") +
guides(edge_alpha = F)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
mesenchymal <- FindClusters(mesenchymal, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1232
## Number of edges: 49714
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9149
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(mesenchymal, group.by = 'seurat_clusters', label = T, pt.size = .25)
#mesenchymal.markers <- FindAllMarkers(mesenchymal)
“Fibroblasts and mesenchymal stem cells: Two sides of the same coin?” “Intrinsic multipotential mesenchymal stromal cell activity in gelatinous Heberden’s nodes in osteoarthritis at clinical presentation” Reviewing the differentially expressed genes from the above reference, it’s clear that the larger cluster is composed of mesenchymal stem cells.
mesenchymal <- RenameIdents(mesenchymal,
"0" = "Mesenchymal Stem Cells",
"1" = "Fibroblasts")
DimPlot(mesenchymal, group.by = "ident", label = T, repel = T, pt.size = .25 ) + NoLegend()
Rename fetal clusters
fetal <- RenameIdents(fetal,
"0" = "Interstitial Cytotrophoblasts",
"1" = "T Cells",
"2" = "NK/T Cells",
"3" = "Mesenchymal",
"4" = "CD14+ Monocytes",
"5" = "Hofbauer Cells",
"6" = "B Cells",
"7" = "Proliferative Cytotrophoblasts",
"8" = "Extravillous Trophoblasts",
"9" = "Syncytiotrophoblast",
"10" = "Plasmacytoid Dendritic Cells",
"11" = "Endothelial Cells",
"12" = "Nucleated Red Blood Cells",
"13" = "13")
## Warning: Cannot find identity 13
# Stash identities for later use if needed
fetal <- StashIdent(fetal, "fetal.res.0.2.cell.type.labelled")
## With Seurat 3.X, stashing identity classes can be accomplished with the following:
## fetal[["fetal.res.0.2.cell.type.labelled"]] <- Idents(object = fetal)
Overwrite coarse cell type labels with subclustering labels.
fetal <- SetIdent(fetal, cells = WhichCells(t.cells), value = Idents(t.cells))
fetal <- SetIdent(fetal, cells = WhichCells(mesenchymal), value = Idents(mesenchymal))
fetal <- StashIdent(fetal, "fetal.res.0.2.subclustered.labelled")
## With Seurat 3.X, stashing identity classes can be accomplished with the following:
## fetal[["fetal.res.0.2.subclustered.labelled"]] <- Idents(object = fetal)
DimPlot(fetal, label = T, repel = T, pt.size = .25, label.size = 3) + NoLegend()
Initial findDoubletClusters by cluster readily identified 13 as a doublet cluster. Removed and re-run.
fetal.sce <- as.SingleCellExperiment(fetal)
dbl <- findDoubletClusters(fetal.sce, clusters = fetal.sce$fetal.res.0.2.subclustered.labelled)
dbl.df <- as.data.frame(dbl)
#saveRDS(dbl.df, file = paste0(data_dir, "fetal.res.0.2.subcluster.labelled.initial.doublet.finder", Sys.Date(), ".rda"))
Try simulation approach to finding doublets. Cluster 13 clearly comes out, but so also does the tail off the B Cells population.
dbl.dens <- computeDoubletDensity(fetal.sce)
fetal$dbl.dens <- dbl.dens
FeaturePlot(fetal, features = "dbl.dens")
small <- subset(fetal, idents = c("Nucleated Red Blood Cells", "Plasmacytoid Dendritic Cells"))
FeaturePlot(object = small,
features = c("HBG2", "ITM2C"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = small,
features = c("HBG2", "CD79A"),
cols = c("lightgrey", "blue"),
pt.size = .1)
FeaturePlot(object = small,
features = c("ITM2C", "CD79A"),
cols = c("lightgrey", "blue"),
pt.size = .1)
VlnPlot(small, features = c("HBG2", "CD79A", "ITM2C"))
DimPlot(small)
dbl.dens.small <- FeaturePlot(small, features = "dbl.dens")
VlnPlot(small, features = "dbl.dens")
#pdc.b <- CellSelector(plot = dbl.dens.small)
#pdc.single.outliers <- CellSelector(plot = dbl.dens.small)
#pdc.high.dbl <- CellSelector(plot = dbl.dens.small)
#pdc.high.dbl.2 <- CellSelector(plot = dbl.dens.small)
#pdc.nrbc <- CellSelector(plot = dbl.dens.small)
#cells.to.drop <- c(pdc.b, pdc.single.outliers, pdc.high.dbl, pdc.high.dbl.2, pdc.nrbc)
#saveRDS(cells.to.drop, paste0(data_dir, "cell_selector_dropped_pdc_rbc_clusters_", Sys.Date(), ".rda"))
cells.to.drop <- readRDS(paste0(data_dir, "cell_selector_dropped_pdc_rbc_clusters_2020-12-22.rda"))
smaller <- subset(small, cells = cells.to.drop, invert = T)
DimPlot(smaller)
FeaturePlot(smaller, features = c("HBG2", "ITM2C"))
FeaturePlot(smaller, features = c("dbl.dens"))
FeaturePlot(smaller, features = c("HBG2", "DDX3Y"))
rbc.plot <- FeaturePlot(smaller, features = "HBG2")
#rbc.1 <- CellSelector(rbc.plot)
#rbc.2 <- CellSelector(rbc.plot)
smaller <- SetIdent(smaller, cells = rbc.1, value = "RBC1")
smaller <- SetIdent(smaller, cells = rbc.2, value = "RBC2")
# No apparent difference between the two RBC clusters
rbc.markers <- FindMarkers(smaller, ident.1 = "RBC1", ident.2 = "RBC2")
# Reset nRBC identities
smaller <- SetIdent(smaller, cells = rbc.1, value = "Nucleated Red Blood Cells")
smaller <- SetIdent(smaller, cells = rbc.2, value = "Nucleated Red Blood Cells")
dropped.13 <- WhichCells(fetal, idents = "13")
#saveRDS(dropped.13, file = paste0(data_dir, "ct_mesenchymal_doublet_cluster_ids_", Sys.Date(), ".rda"))
# Dropping doublets from pDC, nRBC with cells.to.drop
cells.to.drop
drop.fetal <- c(dropped.13, cells.to.drop)
fetal <- subset(fetal, cells = drop.fetal, invert = T)
fetal <- RenameIdents(fetal, "Interstitial Cytotrophoblasts" = "Cytotrophoblasts")
fetal <- StashIdent(fetal, "final_clusters")
#saveRDS(fetal, paste0(data_dir,"cleaned_fetal_seurat", Sys.Date(), ".rda"))
fetal <- readRDS(paste0(data_dir, "cleaned_fetal_seurat2020-12-22.rda"))
Rename cell type clusters with Fetal label
idents <- Idents(fetal)
cell.type <- paste0("Fetal ", idents)
Idents(fetal) <- cell.type
fetal <- StashIdent(fetal, "cell.type")
#saveRDS(fetal, paste0(data_dir, "named_cleaned_fetal_seurat_2020-12-22.rda"))
named.fetal <- readRDS(paste0(data_dir, "named_cleaned_fetal_seurat_2020-12-22.rda"))
DimPlot(named.fetal, label = T, repel = T, pt.size = .25, label.size = 3) + NoLegend()
#ggsave(filename = paste0(data_dir, "fetal_cleaned_", Sys.Date(), ".png"), device = "png")
High S100A4, CD4 expression and low CCR7, CD8A expression indicates CD4+ Memory T Cell Assignment.
FeaturePlot(named.fetal, features = "S100A4")
VlnPlot(named.fetal, features = c("S100A4" , "CCR7"))
VlnPlot(named.fetal, features = c("CD8A", "CD4"))